# Load packages
library(here)
library(dplyr)
library(readr)
library(rstan)
library(bayesrules)
library(tidyverse)
library(bayesplot)
library(rstanarm)
library(janitor)
library(tidybayes)
library(broom.mixed)
library(here)
library(sf)
library(tidycensus)
library(openxlsx)
library(s2)
library(nycgeo)
library(CARBayes)
library(spData)
library(spdep)
nyc_join <- merge(nta_sf,nta_acs_data)
nyc_join <- nyc_join %>%
st_transform(., 4269)
county_list <- nyc_join %>% pull(county_name) %>% unique()
census_api_key("0cc07f06386e317f312adef5e0892b0d002b7254")
census_data <- get_acs(state = "NY",
county = c(county_list),
geography = "tract",
variables = c(gini_inequality ="B19083_001"),
year = 2019,
output = "wide",
survey = "acs5",
geometry = TRUE) %>%
dplyr::select(-c(NAME, ends_with("M"))) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2))) %>%
st_transform(., 4269) %>%
dplyr::select(-GEOID)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 21%
|
|================ | 22%
|
|================ | 23%
|
|================== | 26%
|
|=================== | 27%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|========================= | 35%
|
|========================== | 37%
|
|============================ | 39%
|
|============================ | 40%
|
|=============================== | 45%
|
|================================= | 48%
|
|================================== | 48%
|
|======================================= | 55%
|
|========================================= | 58%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|======================================================= | 79%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|================================================================ | 92%
|
|==================================================================== | 98%
|
|======================================================================| 100%
# themes
theme_set(theme_minimal())
vari_names <- read_csv(here("clean_data", "nyc_names.csv"))
nyc_clean <- st_read(here("clean_data", "nyc_data.shp"), crs = 4269, quiet=T)
colnames(nyc_clean) <- colnames(vari_names)
library(openxlsx)
nta_to_census <- openxlsx::read.xlsx(here("ethnic", "Data", "census_to_nta.xlsx")) %>%
dplyr::select(BoroName, NTACode) %>%
rename(borough = BoroName,
nta_id = NTACode) %>%
unique()
nyc_clean <- nyc_clean %>%
merge(., nta_to_census, by="nta_id") %>%
mutate(transportation_desert_4cat = case_when(
transportation_desert_4cat==1 ~ "Poor",
transportation_desert_4cat ==2 ~ "Limited",
transportation_desert_4cat ==3 ~ "Satisfactory",
TRUE ~ "Excellent",
)) %>%
mutate(transportation_desert_4cat = factor(transportation_desert_4cat, levels=c("Poor", "Limited", "Satisfactory", "Excellent")))
assault <- st_read("/Users/freddy/Downloads/NYPD Arrest Data (Year to Date)/geo_export_a659754f-6263-4ba1-8a58-72dca5befa79.shp") %>%
filter(str_detect(ofns_desc, "FELONY ASSAULT")) %>%
filter(!(str_detect(ofns_desc, "POLICE"))) %>%
filter(str_detect(pd_desc, "2") | str_detect(pd_desc, "1") ) %>%
dplyr::select(arrest_key,geometry) %>%
st_transform(., 4269)
## Reading layer `geo_export_a659754f-6263-4ba1-8a58-72dca5befa79' from data source
## `/Users/freddy/Downloads/NYPD Arrest Data (Year to Date)/geo_export_a659754f-6263-4ba1-8a58-72dca5befa79.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 115299 features and 19 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.25184 ymin: 40.4994 xmax: -73.703 ymax: 40.91272
## Geodetic CRS: WGS84(DD)
sex <- st_read("/Users/freddy/Downloads/NYPD Arrest Data (Year to Date)/geo_export_a659754f-6263-4ba1-8a58-72dca5befa79.shp") %>%
filter(str_detect(ofns_desc, "SEX")) %>%
dplyr::select(arrest_key,geometry) %>%
st_transform(., 4269)
## Reading layer `geo_export_a659754f-6263-4ba1-8a58-72dca5befa79' from data source
## `/Users/freddy/Downloads/NYPD Arrest Data (Year to Date)/geo_export_a659754f-6263-4ba1-8a58-72dca5befa79.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 115299 features and 19 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.25184 ymin: 40.4994 xmax: -73.703 ymax: 40.91272
## Geodetic CRS: WGS84(DD)
assault_neighborhood <- st_join(nyc_clean, assault, left = TRUE) %>%
group_by(nta_id) %>%
summarize(assault_count=n()) %>%
as.tibble()
sex_neighborhood <- st_join(nyc_clean, sex, left = TRUE) %>%
group_by(nta_id) %>%
summarize(sexcrime_count=n()) %>%
as.tibble()
gini_neighborhood <- st_join(nyc_clean, census_data,left = TRUE) %>%
group_by(nta_id) %>%
summarize(gini_neighborhood=mean(gini_inequality, na.rm=T)) %>%
as.tibble() %>%
dplyr::select(nta_id, gini_neighborhood)
assault_gini <- left_join(assault_neighborhood, gini_neighborhood,by="nta_id") %>%
mutate(gini = gini_neighborhood, .before=3)%>%
dplyr::select(-gini_neighborhood) %>%
dplyr::select(-geometry)
sex_assault_gini <- left_join(assault_gini, sex_neighborhood,by="nta_id") %>%
mutate(sex_crime_count = sexcrime_count, .before=3)%>%
dplyr::select(-sexcrime_count) %>%
dplyr::select(-geometry)
nyc_clean <- nyc_clean %>%
as.tibble() %>%
filter(nta_id %in% sex_assault_gini$nta_id) %>%
left_join(., sex_assault_gini, by="nta_id")%>%
unique() %>%
st_as_sf()
subway_stations <- st_read(here("ethnic","Data","stations", "geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp"), quiet=T) %>%
st_transform(., 4269)
bus_stations <- st_read(here("ethnic","Data","bus", "bus_stops_nyc_may2020.shp"), quiet=T) %>%
st_transform(., 4269)%>%
filter(str_detect(NAMELSAD, "Richmond", negate=T))
transit_points <- read_csv(here("transit","ridership_points.csv"))%>%
separate(Position, into=c("Point", "longitude", "latitude"), " ") %>%
mutate(latitude = str_remove_all(latitude, "[)]"),
longitude = str_remove_all(longitude, "[()]"),
) %>%
dplyr::select(-c(Point)) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269)
#plot locations over map
subway_loc <- ggplot() +
geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = subway_stations, color="#3F123C", size=1) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
bus_loc <- ggplot() +
geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = bus_stations, color="#3F123C", size=.5, alpha=.5) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
stops <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = sub_count), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Number of Subway Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
bus_stops <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = bus_count), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Number of Bus Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
ridership <- nyc_clean%>%
ggplot() +
geom_sf(aes(fill = log2(mean_ridership)), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Log2 Mean Ridership") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean (Log2) Subway Turnstile \nRidership in 2018 \nfor NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
access <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = transportation_desert_4cat), color = "#8f98aa") +
scale_fill_manual(values=c("#a45371","#e5b6c7","#ebebf7","#89a2d1"),
guide = guide_legend(title = "Subway Accessibility Category"), na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Deserts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
`
red <- ggplot(nyc_clean) +
geom_sf(aes(fill = below_poverty_line_count), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#E13728", guide = guide_legend(title = "Number Below \nPoverty Line")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Impoverishement")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
yellow <- ggplot(nyc_clean) +
geom_sf(aes(fill = mean_income), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#F3D24E", guide = guide_legend(title = "Mean Income")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Income")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
teal <- ggplot(nyc_clean) +
geom_sf(aes(fill = mean_rent), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#2DBDC7", guide = guide_legend(title = "Dollars")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Rent")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
purple <- ggplot(nyc_clean) +
geom_sf(aes(fill = eviction_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#7826C0", guide = guide_legend(title = "Number of Evictions")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Evictions")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
orange <- ggplot(nyc_clean) +
geom_sf(aes(fill = unemployment_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#FC9228", guide = guide_legend(title = "Number on \nUnemployment")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Unemployment")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
green <- ggplot(nyc_clean) +
geom_sf(aes(fill = store_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#326902", guide = guide_legend(title = "Number of Stores")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Retail Food Stores")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
blue <- ggplot(nyc_clean) +
geom_sf(aes(fill = school_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#5372C4",
guide = guide_legend(title = "Number of Schools")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Number of Schools")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
pink <- ggplot(nyc_clean) +
geom_sf(aes(fill = uninsured_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#F450E1", guide = guide_legend(title = "Number of People \n without Insurance Coverage")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Insurance Coverage")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
navy <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = gini), color = "#8f98aa")+
scale_fill_gradient(low = "#F8E3DD", high = "#16236f",
guide = guide_legend(title = "Gini Inequality Values"))+
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Income Inequality")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 25, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
white <- ggplot(nyc_clean) +
geom_sf(aes(fill = white_count), color = "#8f98aa") +
scale_fill_gradientn(colors = c("#FCF5EE","#BD9DA5", "#9C7080", "#7B435B"), guide = guide_legend(title = "Number White")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("White Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
black <- ggplot(nyc_clean) +
geom_sf(aes(fill = black_count), color = "#8f98aa") +
scale_fill_gradientn(colors = c("#FCF5EE","#F8ABA6", "#F58581", "#F25F5C"), guide = guide_legend(title = "Number Black")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Black Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
asian <- ggplot(nyc_clean) +
geom_sf(aes(fill = asian_count), color = "#8f98aa") +
scale_fill_gradientn(colors = c("#FCF5EE","#B8BAD9", "#959CCE", "#717EC3"), guide = guide_legend(title = "Number Asian")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Asian Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
latinx <- ggplot(nyc_clean) +
geom_sf(aes(fill = latinx_count), color = "#8f98aa")+
scale_fill_gradientn(colors = c("#FCF5EE","#FDC894", "#FDB166", "#FC9A38"),
guide = guide_legend(title = "Number Latinx")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Latinx Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
pop <- ggplot(nyc_clean) +
geom_sf(aes(fill = total_pop), color = "#8f98aa")+
scale_fill_gradientn(colors = c("#FCF5EE","#C0E3C3", "#A1D9AD", "#81CF97"), guide = guide_legend(title = "Number of People")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Total Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
All the data used in this project are from two major sources: the Tidycensus package and NYC Open Data.
Tidycensus is an R package interface, developed by Kyle Walker and Matt Herman, that enables easy access to the US Census Bureau’s data APIs and returns Tidyverse-ready data frames from various major US Census Bureau datasets. Our demographic and socioeconomic data are drawn from the American Community Survey results found in Tidycensus package. A summary of our ACS data variables is below:
borough:Each Neighborhood’s Borough.total_pop: Total Population by Neighborhoodmean_income: Mean Income by Neighborhoodbelow_poverty_line_count: Number of People Living Below the 100% Poverty Line by Neighborhoodmean_rent: Mean Rent by Neighborhoodunemployment_count: Number of People on Unemployment by Neighborhoodlatinx_count: Number of Latinx People by Neighborhoodwhite_count: Number of White People by Neighborhoodblack_count: Number of Black People by Neighborhoodnative_count: Number of Native People by Neighborhoodasian_count: Number of Asian People by Neighborhoodnaturalized_citizen_count: Number of Naturalized Citizens by Neighborhoodnoncitizen_count: Number of Foreign Born People by Neighborhooduninsured_count: Number of Uninsured Citizens of any Age by NeighborhoodFor remaining predictors, we used NYC Open Data’s portal to identify specific predictors. In particular, we used geotagged locations of Subway Stops, Bus Stops, Grocery Stores, Schools, and Eviction Sites from the Departments of Transportation, Health, Education, and Housing to calculate neighborhood-specific variables described below:
school_count: Number of Public Schools by Neighborhoodeviction_count: Number of Evictions by Neighborhoodstore_count: Number of Grocery Stores and Food Vendors by Neighborhoodsub_count: Number of Subway Stations by Neighborhoodbus_count: Number of Bus Stations by Neighborhoodperc_covered_by_transit: Percent of Neighborhood Within Walking Distance (.25 miles) of Any Subway Stop.transportation_desert_4cat: Subway Accessibility by Neighborhood (None, Limited, Satisfactory, Excellent)Lastly, we acquired subway ridership from Metropolitan Transportation Authority’s turnstile data for the week of September 7, 2019. For each station, entry/exit of each turnstile is recorded. Then, we aggregated this information by taking the station-specific average of subway ridership across the 7 days in the week. Finally, we geotagged each listed station, then took the mean of ridership at all subway stations in each neighborhood to create.
mean_ridership: Mean Subway Ridership by Neighborhood for the week of September 7th.
Our data has 224 observations of 26 variables. Below is a preview of our dataset with colnames attached.
library(kableExtra)
kable(head(nyc_clean, n=3)) %>% kable_styling() %>% scroll_box(width = "100%", height = "200px")
| nta_id | total_pop | mean_income | below_poverty_line_count | below_poverty_line_and_50_count | mean_rent | unemployment_count | latinx_count | white_count | black_count | native_count | asian_count | naturalized_citizen_count | noncitizen_count | uninsured_count | school_count | eviction_count | store_count | sub_count | bus_count | mean_ridership | perc_covered_by_transit | transportation_desert_4cat | borough | geometry | assault_count | sex_crime_count | gini |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BK0101 | 26308 | 98338.67 | 2397 | 1289 | 2062.667 | 582 | 3284 | 20526 | 482 | 40 | 1052 | 3777 | 3129 | 1797 | 6 | 68 | 71 | 2 | 53 | 9410.500 | 78.76390 | Satisfactory | Brooklyn | MULTIPOLYGON (((-73.94074 4… | 22 | 7 | 0.4420550 |
| BK0102 | 57774 | 101238.92 | 9120 | 4474 | 2330.077 | 1710 | 18227 | 32237 | 1460 | 0 | 4008 | 6802 | 7746 | 3725 | 12 | 204 | 129 | 2 | 97 | 26603.000 | 89.80852 | Satisfactory | Brooklyn | MULTIPOLYGON (((-73.96355 4… | 14 | 14 | 0.4859323 |
| BK0103 | 36891 | 30309.25 | 18285 | 5970 | 1194.875 | 457 | 3351 | 31799 | 1288 | 20 | 194 | 3548 | 1012 | 711 | 6 | 45 | 58 | 3 | 35 | 6348.667 | 88.13439 | Satisfactory | Brooklyn | MULTIPOLYGON (((-73.96762 4… | 7 | 1 | 0.4937143 |
Below is a numeric summary of each variable’s distribution.
#summary(nyc_clean)
library(table1)
table_print <- table1(~ total_pop + mean_income + below_poverty_line_count+
mean_rent + unemployment_count + white_count + uninsured_count + school_count + eviction_count + store_count + transportation_desert_4cat+ sub_count + bus_count + mean_ridership | borough, data = nyc_clean %>% as_tibble()) %>% as_tibble()
colnames(table_print) <- c("Variable", "Bronx (N=44)", "Brooklyn (N=64)","Manhattan (N=39)", "Queens (N=77)", "Overall (N=224)")
table_print%>%
filter(Variable!="") %>% kable() %>% kable_styling() %>% scroll_box(width = "100%", height = "500px")
| Variable | Bronx (N=44) | Brooklyn (N=64) | Manhattan (N=39) | Queens (N=77) | Overall (N=224) |
|---|---|---|---|---|---|
| total_pop | |||||
| Mean (SD) | 30200 (18700) | 37800 (24600) | 38300 (24600) | 25900 (20900) | 32300 (22800) |
| Median [Min, Max] | 29800 [0, 69200] | 38300 [0, 97800] | 35700 [0, 95300] | 25000 [0, 87700] | 31600 [0, 97800] |
| mean_income | |||||
| Mean (SD) | 43400 (17900) | 69600 (28700) | 103000 (49700) | 74500 (14400) | 71900 (33900) |
| Median [Min, Max] | 38000 [23100, 94200] | 61200 [27400, 148000] | 108000 [33300, 212000] | 72600 [37500, 104000] | 67200 [23100, 212000] |
| Missing | 8 (18.2%) | 9 (14.1%) | 6 (15.4%) | 19 (24.7%) | 42 (18.8%) |
| below_poverty_line_count | |||||
| Mean (SD) | 8360 (6490) | 7430 (6120) | 6180 (6100) | 3090 (2900) | 5900 (5700) |
| Median [Min, Max] | 7260 [0, 21600] | 6880 [0, 28800] | 3290 [0, 22800] | 2680 [0, 11600] | 4220 [0, 28800] |
| mean_rent | |||||
| Mean (SD) | 1230 (197) | 1580 (465) | 1960 (674) | 1650 (198) | 1600 (465) |
| Median [Min, Max] | 1240 [833, 1620] | 1450 [792, 3280] | 2070 [884, 3270] | 1630 [1140, 2250] | 1510 [792, 3280] |
| Missing | 8 (18.2%) | 9 (14.1%) | 6 (15.4%) | 19 (24.7%) | 42 (18.8%) |
| unemployment_count | |||||
| Mean (SD) | 1400 (972) | 1180 (903) | 1210 (1100) | 756 (685) | 1080 (916) |
| Median [Min, Max] | 1330 [0, 3150] | 1120 [0, 3770] | 952 [0, 4700] | 668 [0, 3150] | 919 [0, 4700] |
| white_count | |||||
| Mean (SD) | 2830 (4990) | 13900 (14200) | 17200 (14700) | 6740 (8500) | 9850 (12300) |
| Median [Min, Max] | 919 [0, 27500] | 10900 [0, 64500] | 13800 [0, 69300] | 4200 [0, 43900] | 4960 [0, 69300] |
| uninsured_count | |||||
| Mean (SD) | 2570 (1990) | 2750 (2260) | 2030 (2150) | 2350 (2510) | 2450 (2280) |
| Median [Min, Max] | 2340 [0, 8030] | 2610 [0, 10100] | 1380 [0, 10300] | 1660 [0, 12200] | 1910 [0, 12200] |
| school_count | |||||
| Mean (SD) | 8.64 (7.44) | 8.16 (6.79) | 8.54 (7.79) | 4.08 (2.88) | 6.92 (6.41) |
| Median [Min, Max] | 5.50 [1.00, 27.0] | 6.00 [1.00, 31.0] | 5.00 [1.00, 28.0] | 3.00 [1.00, 12.0] | 5.00 [1.00, 31.0] |
| eviction_count | |||||
| Mean (SD) | 438 (340) | 245 (234) | 223 (233) | 126 (131) | 238 (255) |
| Median [Min, Max] | 406 [1.00, 1130] | 163 [1.00, 829] | 152 [1.00, 1120] | 93.0 [1.00, 521] | 148 [1.00, 1130] |
| store_count | |||||
| Mean (SD) | 53.2 (38.6) | 69.8 (49.5) | 58.6 (39.8) | 33.1 (33.8) | 52.0 (43.1) |
| Median [Min, Max] | 48.5 [1.00, 138] | 70.5 [1.00, 202] | 54.0 [1.00, 151] | 24.0 [1.00, 147] | 45.0 [1.00, 202] |
| transportation_desert_4cat | |||||
| Poor | 4 (9.1%) | 8 (12.5%) | 2 (5.1%) | 40 (51.9%) | 54 (24.1%) |
| Limited | 16 (36.4%) | 16 (25.0%) | 2 (5.1%) | 22 (28.6%) | 56 (25.0%) |
| Satisfactory | 11 (25.0%) | 18 (28.1%) | 11 (28.2%) | 12 (15.6%) | 52 (23.2%) |
| Excellent | 13 (29.5%) | 22 (34.4%) | 24 (61.5%) | 3 (3.9%) | 62 (27.7%) |
| sub_count | |||||
| Mean (SD) | 1.95 (1.33) | 2.77 (2.06) | 4.03 (3.53) | 1.55 (1.22) | 2.41 (2.23) |
| Median [Min, Max] | 1.00 [1.00, 7.00] | 2.00 [1.00, 9.00] | 3.00 [1.00, 17.0] | 1.00 [1.00, 6.00] | 1.00 [1.00, 17.0] |
| bus_count | |||||
| Mean (SD) | 40.1 (22.9) | 60.2 (41.1) | 46.6 (26.0) | 56.9 (45.0) | 52.7 (38.0) |
| Median [Min, Max] | 43.5 [1.00, 125] | 59.0 [1.00, 170] | 44.0 [2.00, 106] | 52.0 [1.00, 243] | 49.0 [1.00, 243] |
| mean_ridership | |||||
| Mean (SD) | 7180 (3410) | 7400 (4690) | 22900 (19500) | 10900 (12200) | 12000 (13300) |
| Median [Min, Max] | 6670 [2420, 15700] | 6610 [1040, 26600] | 18000 [5640, 110000] | 7920 [273, 55700] | 7960 [273, 110000] |
| Missing | 21 (47.7%) | 18 (28.1%) | 7 (17.9%) | 54 (70.1%) | 100 (44.6%) |
library(ggridges)
plot_1<-nyc_clean %>%
ggplot(aes(x=mean_income, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Mean Income", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_2<-nyc_clean %>%
ggplot(aes(x=below_poverty_line_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Number Below Poverty Line", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_3<-nyc_clean %>%
ggplot(aes(x=mean_rent, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Mean Rent", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_4<-nyc_clean %>%
ggplot(aes(x=unemployment_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Unemployed Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_5<-nyc_clean %>%
ggplot(aes(x=white_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="White Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_6<-nyc_clean %>%
ggplot(aes(x=uninsured_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Uninsured Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_7<-nyc_clean %>%
ggplot(aes(x=school_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="School Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_8<-nyc_clean %>%
ggplot(aes(x=eviction_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Eviction Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_9<-nyc_clean %>%
ggplot(aes(x=store_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Food Retail Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_10<-nyc_clean %>%
ggplot(aes(x=borough, fill=transportation_desert_4cat), alpha=.6) +
geom_bar(position="fill") +
scale_y_continuous(labels = seq(0, 100, by = 25)) +
labs(title="Subway Accessibility", y="", x="")+
theme(panel.grid.major = element_line("transparent"),
# axis.text.y.left = element_blank(),
axis.text.x.bottom = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold")) +
scale_fill_manual(values=c("#a45371","#e5b6c7","#ebebf7","#89a2d1"),
guide = guide_legend(title = "Subway Accessibility"), na.value="#D6D6D6")
plot_11<-nyc_clean %>%
ggplot(aes(x=sub_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Subway Stop Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_12<-nyc_clean %>%
ggplot(aes(x=bus_count, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Bus Stop Counts", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
plot_13<-nyc_clean %>%
ggplot(aes(x=mean_ridership, y=borough, fill=borough), alpha=.6) +
geom_density_ridges() +
labs(title="Mean Ridership", y="")+
theme(panel.grid.major = element_line("transparent"),
axis.text.y.left = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 28,hjust=.5, face = "bold"),
legend.position="none") +
scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
library(egg)
ggarrange(plot_1, plot_2, plot_3,
plot_4, plot_5, plot_6,
plot_7, plot_8, plot_9, plot_11, plot_12,
plot_13, plot_10,
ncol=4)
library(egg)
ggarrange(subway_loc, bus_loc, stops, bus_stops, ridership, access, ncol=3)
ggarrange(red, orange, yellow, green, teal, blue, navy, purple, pink, ncol=3)